library(tidyverse)
library(ggplot2)
library(ggsci)

net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/results/mic/"
expression_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/input/"

# BN folders 
data_prefix = "mic_m46"
bn_results_dir = "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_mic_res/mic_modules/mic_m46/"
BN_run_dir = "/pastel/resources/bayesian_networks/CINDERellA/"
BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")

Expression data

mod2plot = 46 # Module we want to plot

# Expression data for a single set:
exprData = read.table(paste0(expression_dir, "mic.txt"), header = T, stringsAsFactors = F, check.names = F)
expr_matx = as.data.frame(exprData) # Residuals of the expression
gene_modules = read.table(paste0(net_dir, "geneBycluster.txt"), header = T, stringsAsFactors = F) # clusters from SpeakEasy

# Select the expression values for the module of interest 
to_plot = gene_modules$ensembl[gene_modules$cluster_lv3 == mod2plot]
expr_matx_mod = expr_matx[to_plot, ]

# Save the input expression for the BN
# save(expr_matx_mod, file = paste0(bn_results_dir,data_prefix,"_dataExp.RData"))
# write_csv(as.data.frame(expr_matx_mod), file = BN_run_data, col_names = F)
# CINDERellA has two inputs: exp.txt and output_folder 
setwd(BN_run_dir)
cmd_matlab_call = paste0("matlab -nodisplay -nojvm -nosplash -nodesktop -r")
cmd_matlab_param = paste0("runt=1000; data='",BN_run_data,"'; out_dir='",BN_output_dir,"'; run('",BN_run_dir,"CINDERellA.m')")
cmd_matlab_run = paste0(cmd_matlab_call, " \"",cmd_matlab_param,"\"")
cat(cmd_matlab_run)
system(cmd_matlab_run)
# Results are saved here:
print(BN_output_dir)

Read results

Edges filtered

# BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
# BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")

# load(file = paste0(bn_results_dir,data_prefix,"_BN_input.RData")) # phenotype_match,gene_net_metadata,mod_eigengen
# load(file = paste0(bn_results_dir,data_prefix,"_dataExp.RData"))

edgefrq = read_tsv(paste0(BN_output_dir,"/edgefrq.txt"), col_names = c("A","B","freq"), show_col_types = F)

dataExp = expr_matx_mod
edges_df = na.omit(cbind(edgefrq, rownames(dataExp)[edgefrq$A], rownames(dataExp)[edgefrq$B]))
colnames(edges_df) = c("fromNode", "toNode", "weight", "fromAltName", "toAltName") # match names for Cytoscape input

edges_df$fromAltName = gsub("(.*)\\.(.*)","\\1",edges_df$fromAltName)
edges_df$toAltName = gsub("(.*)\\.(.*)","\\1",edges_df$toAltName)

edges_filtered = edges_df[abs(edges_df$weight)>0.4, ] # weight default = 0.33
rownames(edges_filtered) = NULL

createDT(edges_filtered %>% arrange(-weight))

Nodes filtered

nodes_table = data.frame(nodeName = 1:nrow(dataExp), altName = gsub("(.*)\\.(.*)","\\1",rownames(dataExp))) %>% distinct()
nodes_table$altName = gsub("(.*)\\.(.*)","\\1",nodes_table$altName)
rownames(nodes_table) = NULL
nodes_table = na.omit(unique(nodes_table)) %>% left_join(unique(gene_modules[,c("ensembl","gene_name")]), by = c("altName"="ensembl"))

nodes_filtered = nodes_table[nodes_table$altName %in% unique(c(edges_filtered$fromAltName,edges_filtered$toAltName)), ]
nodes_filtered = nodes_filtered[! duplicated(nodes_filtered$altName), ] 

createDT(nodes_filtered)

BN plot

plot_geneBN(edges_filtered = edges_filtered,
          nodes_filtered = nodes_filtered)

Session

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gtools_3.9.4       ggforce_0.3.3      graphlayouts_0.8.0 ggraph_2.0.5      
##  [5] igraph_2.0.3       ggsci_3.0.3        lubridate_1.9.3    forcats_1.0.0     
##  [9] stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2        readr_2.1.5       
## [13] tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.0      tidyverse_2.0.0   
## 
## loaded via a namespace (and not attached):
##  [1] ggrepel_0.9.5     Rcpp_1.0.12       digest_0.6.35     utf8_1.2.4       
##  [5] R6_2.5.1          evaluate_0.23     highr_0.10        pillar_1.9.0     
##  [9] rlang_1.1.3       rstudioapi_0.16.0 jquerylib_0.1.4   DT_0.30          
## [13] rmarkdown_2.26    labeling_0.4.3    htmlwidgets_1.6.4 polyclip_1.10-6  
## [17] bit_4.0.5         munsell_0.5.1     compiler_4.1.2    xfun_0.43        
## [21] pkgconfig_2.0.3   htmltools_0.5.8.1 tidyselect_1.2.1  gridExtra_2.3    
## [25] viridisLite_0.4.2 fansi_1.0.6       crayon_1.5.2      tzdb_0.4.0       
## [29] withr_3.0.0       MASS_7.3-54       grid_4.1.2        jsonlite_1.8.8   
## [33] gtable_0.3.4      lifecycle_1.0.4   magrittr_2.0.3    scales_1.3.0     
## [37] cli_3.6.2         stringi_1.8.3     vroom_1.6.5       cachem_1.0.8     
## [41] farver_2.1.1      viridis_0.6.2     bslib_0.7.0       generics_0.1.3   
## [45] vctrs_0.6.5       tools_4.1.2       bit64_4.0.5       glue_1.7.0       
## [49] tweenr_1.0.2      hms_1.1.3         crosstalk_1.2.1   shadowtext_0.1.1 
## [53] parallel_4.1.2    fastmap_1.1.1     yaml_2.3.8        timechange_0.3.0 
## [57] colorspace_2.1-0  tidygraph_1.2.0   knitr_1.46        sass_0.4.9